


#####################################################
###### 04_inferential_analyses
###### Regressions and other inferential analyses
#####################################################



rm(list = ls())
library(dplyr)
library(here)
library(stargazer)
library(xtable)

## source plotting theme
source(here("code/00_utils.R"))

## labels
race_cov_labels <- c("2+ or other",
                     "Asian",
                     "Black non-Hispanic",
                     "Hispanic")
race_labels_wref <- sprintf("%s\n(ref: non-Hispanic white)", race_cov_labels)


educ_cov_labels <- c("High school or less", 
                     "Some college")
educ_labels_wref <- sprintf("%s\n(ref: college+)", educ_cov_labels)


ideo_cov_labels <- c("Moderate", "Conservative")
ideo_labels_wref <- sprintf("%s\n(ref: liberal)", ideo_cov_labels)


income_cov_labels <- c("Income: <$30,000",
                       "Income: $30-$60,000",
                       "Income: $60-$100,000")
income_labels_wref <- sprintf("%s\n(ref: Income >$100,000)", income_cov_labels)

parent_cov_labels = c("Current K-12 parent", "Ever K-12 parent")
parent_labels_wref <- sprintf("%s\n(ref: Never parent)", parent_cov_labels)


partisan_cov_labels <- c("Independent", "Republican", "Unknown")
partisan_labels_wref <- sprintf("%s\n(ref: Democrat)", partisan_cov_labels)



#####################################################
###### Load data and filter to parents only 
#####################################################

## Load data
df_all <- read.csv(here("data/analytic_df.csv"))

## repeat for parents 
df_analytic <- df_all %>% filter(!derived_parent %in% c("Never parent", "Other"))



#####################################################
###### Main text: education gradients among parents
#####################################################

all_statusquo <- unique(df_analytic$derived_statusquo_cond)

## educ gradients
reg_educdiff <- lapply(all_statusquo, 
                       function(x) 
                         glm(derived_other_morefair ~ relevel(factor(derived_educ_3cat), 
                                                              ref = "College or\nprofessional school"),
                             data = df_analytic %>% filter(derived_statusquo_cond == x),
                             family = stats::quasibinomial(link = "logit"),
                             weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                               pull(derived_comb_weight)))
stargazer(reg_educdiff, column.labels = all_statusquo,
          covariate.labels = educ_labels_wref,
          font.size = "scriptsize",
          star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Regression results: cleavages among parents by educational attainment (positive coefficient = more likely to rate the non-algorithmic method as more fair)")


#####################################################
###### Main text: ideological differences among parents
#####################################################

## pol ideology
reg_ideodiff <- lapply(all_statusquo, 
                       function(x) 
                         glm(derived_other_morefair ~ relevel(factor(derived_polideo_category), 
                                                              ref = "Slightly - extremely liberal"),
                             data = df_analytic %>% filter(derived_statusquo_cond == x),
                             family = stats::quasibinomial(link = "logit"),
                             weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                               pull(derived_comb_weight)))


stargazer(reg_ideodiff, column.labels = all_statusquo,
          covariate.labels = ideo_labels_wref,
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          model.numbers = FALSE,
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          title = "Regression results: cleavages among parents by political ideology (positive coefficient = more likely to rate the non-algorithmic method as more fair)",
          font.size = "scriptsize")


#####################################################
###### Main text: change after bias update 
#####################################################

## run pooling status quo conditions 
educ_sticky_reg <- lm(derived_changeview ~ relevel(factor(derived_educ_3cat), 
                                                   ref = "College or\nprofessional school"),
                      data = df_analytic %>% filter(derived_alg_morefair),
                      weights = df_analytic %>% filter(derived_alg_morefair) %>%
                        pull(derived_comb_weight))
ideology_sticky_reg <- lm(derived_changeview ~ relevel(factor(derived_polideo_category), 
                                                       ref = "Slightly - extremely liberal"),
                          data = df_analytic %>% filter(derived_alg_morefair),
                          weights = df_analytic %>% filter(derived_alg_morefair) %>%
                            pull(derived_comb_weight))
race_sticky_reg <- lm(derived_changeview ~ relevel(factor(derived_race), 
                                                   ref = "White NH"),
                      data = df_analytic %>% filter(derived_alg_morefair),
                      weights = df_analytic %>% filter(derived_alg_morefair) %>%
                        pull(derived_comb_weight))
income_sticky_reg <- lm(derived_changeview ~ relevel(factor(derived_income), 
                                                     ref = "Income: >$100,000+"),
                        data = df_analytic %>% filter(derived_alg_morefair),
                        weights = df_analytic %>% filter(derived_alg_morefair) %>%
                          pull(derived_comb_weight))
dem_cov_labels <- c(race_labels_wref, educ_labels_wref, income_labels_wref, ideo_labels_wref)
stargazer(race_sticky_reg, 
          educ_sticky_reg, 
          income_sticky_reg,
          ideology_sticky_reg, 
          report = "vcsp*", 
          model.numbers = FALSE,
          covariate.labels = dem_cov_labels,
          star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.caption = "Dependent variable: recommends against algorithm",
          dep.var.labels = "",
          title = "Among respondents who rated an algorithm as fairer, who recommends against an algorithm's use after learning about algorithmic bias? Positive = more likely to turn against
algorithms",
          column.labels = c("Race/ethnicity", "Educ.", "Income", "Ideology"),
          font.size = "scriptsize")


#####################################################
###### Supplement: overall ratings of algorithms versus other methods 
#####################################################

## get each status quo condition
all_statusquo = unique(df_analytic$derived_statusquo_cond)

## do test of diff in proportions comparing prop select alg as more fair,
## prop who select other
all_compareprop = lapply(all_statusquo, compare_statusquo_alg_binary, full_data = df_analytic)
names(all_compareprop) = all_statusquo

prop_allrespondents_pvalues <- lapply(all_compareprop, function(x) process_proptest(x)) %>%  
  bind_rows(, .id = "which_method") %>%
  mutate(p = ifelse(p < 0.001, 
                    "p < 0.001",
                    as.character(p)))

### repeat for parents only
all_compareprop_paronly = lapply(all_statusquo, compare_statusquo_alg_binary, 
                                 full_data = df_analytic %>%
                                   filter(!derived_parent %in% c("Other", "Never parent")))
names(all_compareprop_paronly) = all_statusquo

prop_parents_pvalues <- lapply(all_compareprop_paronly, function(x) process_proptest(x)) %>%  
  bind_rows(, .id = "which_method") %>%
  mutate(p = ifelse(p < 0.001, 
                    "p < 0.001",
                    as.character(p)))

## main views
colnames(prop_allrespondents_pvalues) <- c("Other method", "Prop alg. fairer",
                                           "Prop. other method fairer",
                                           "p value")

print(xtable(prop_allrespondents_pvalues,
             caption = "P-values from two-tailed test of differences of proportion; proportions are from binary rating of each method versus algorithms as more fair (parents)",
             label = "tab:fullsamplep",
             align = "|p{3cm}|p{3cm}|p{3cm}|p{3cm}|p{3cm}|",
             size = "small"),
      type = "latex", booktabs = TRUE,
      include.rownames = FALSE,
      file = here("output/tables/parent_proptest.tex"),
      caption.placement = "top")



#####################################################
###### Supplement: differences between parents and non-parents
#####################################################

df_parentcomparison <- df_all %>% filter(derived_parent != "Other")
reg_pardiff <- lapply(all_statusquo, 
                       function(x) 
                         glm(derived_other_morefair ~ relevel(factor(derived_parent), 
                                                              ref = "Never parent"),
                             data = df_parentcomparison %>% filter(derived_statusquo_cond == x),
                             family = stats::quasibinomial(link = "logit"),
                             weights = df_parentcomparison %>% filter(derived_statusquo_cond == x) %>%
                               pull(derived_comb_weight)))

stargazer(reg_pardiff, column.labels = all_statusquo,
          covariate.labels = parent_labels_wref,
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          model.numbers = FALSE,
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          title = "Regression results: differences by parenting status (positive coefficient = more likely to rate the non-algorithmic method as more fair)",
          font.size = "scriptsize")


#####################################################
###### Supplement: race/ethnicity differences among parents
#####################################################

reg_racediff <- lapply(all_statusquo, 
                       function(x) 
                         glm(derived_other_morefair ~ relevel(factor(derived_race), 
                                                              ref = "White NH"),
                             data = df_analytic %>% filter(derived_statusquo_cond == x),
                             family = stats::quasibinomial(link = "logit"),
                             weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                               pull(derived_comb_weight)))


stargazer(reg_racediff, column.labels = all_statusquo,
          covariate.labels = race_labels_wref,
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          model.numbers = FALSE,
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          title = "Regression results: cleavages among parents by race/ethnicity (positive coefficient = more likely to rate the non-algorithmic method as more fair)",
          font.size = "scriptsize")

#####################################################
###### Supplement: partisan differences among parents
#####################################################
### examine partisanship
reg_partisandiff <- lapply(all_statusquo, 
                           function(x) 
                             glm(derived_other_morefair ~ relevel(factor(derived_pol), 
                                                                  ref = "Democrat"),
                                 data = df_analytic %>% filter(derived_statusquo_cond == x),
                                 family = stats::quasibinomial(link = "logit"),
                                 weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                                   pull(derived_comb_weight)))


stargazer(reg_partisandiff, column.labels = all_statusquo,
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          covariate.labels = partisan_labels_wref,
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Regression results: cleavages among parents by partisanship (positive coefficient = more likely to rate the non-algorithmic method as more fair)",
          font.size = "scriptsize")


#####################################################
###### Supplement: income patterns 
#####################################################

reg_incomediff <- lapply(all_statusquo, 
                           function(x) 
                             glm(derived_other_morefair ~ relevel(factor(derived_income), 
                                                                  ref = "Income: >$100,000+"),
                                 data = df_analytic %>% filter(derived_statusquo_cond == x),
                                 family = stats::quasibinomial(link = "logit"),
                                 weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                                   pull(derived_comb_weight)))


stargazer(reg_incomediff, column.labels = all_statusquo,
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          covariate.labels = income_labels_wref,
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Regression results: cleavages among parents by income strata (positive coefficient = more likely to rate the non-algorithmic method as more fair)",
          font.size = "scriptsize")


#####################################################
###### Supplement: interaction between education and ideology
#####################################################

reg_ideology_education <- lapply(all_statusquo, 
                       function(x) 
                         glm(derived_other_morefair ~ relevel(factor(derived_polideo_category), 
                                                              ref = "Slightly - extremely liberal")*
                                                  relevel(factor(derived_educ_3cat), 
                                                  ref = "College or\nprofessional school"),
                             data = df_analytic %>% filter(derived_statusquo_cond == x),
                             family = stats::quasibinomial(link = "logit"),
                             weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                               pull(derived_comb_weight)))


stargazer(reg_ideology_education, column.labels = all_statusquo,
          font.size = "scriptsize",
          star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Regression results: interaction between ideology and educational attainment")


#####################################################
###### Supplement: balance among status quo methods 
#####################################################

reg_covar_balance <- lapply(all_statusquo, 
              function(x) 
                glm(formula(sprintf("binary_dv ~ 
                derived_polideo_category +
                derived_educ_3cat +
                derived_income +
                derived_race", x)),
                data = df_analytic %>% mutate(binary_dv = derived_statusquo_cond == x),
                family = stats::quasibinomial(link = "logit"),
                 weights = df_analytic %>%
                pull(derived_comb_weight)))

stargazer(reg_covar_balance, column.labels = all_statusquo,
          font.size = "scriptsize",
          star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.caption = "Dependent variable: randomization to that status quo method",
          dep.var.labels = "Status quo method",
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Balance within each status quo condition")


#####################################################
###### Supplement: school racial/ethnic composition 
#####################################################

reg_schoolcomp <- lapply(all_statusquo, 
                                 function(x) 
                          glm(derived_other_morefair ~ 
                            relevel(factor(derived_schoolcomp_cond),
                                    ref = "Integrated"),
                          data = df_analytic %>% filter(derived_statusquo_cond == x),
                          family = stats::quasibinomial(link = "logit"),
                          weights = df_analytic %>% filter(derived_statusquo_cond == x) %>%
                          pull(derived_comb_weight)))


stargazer(reg_schoolcomp, 
          column.labels = all_statusquo,
          covariate.labels = c("Majority Black and Hispanic\n(ref: Integrated)",
                               "Majority white\n(ref: Integrated)"),
          font.size = "scriptsize",
          star.cutoffs = c(0.05, 0.01, 0.001),
          dep.var.caption = "Dependent variable: yes status quo is fairer than algorithm",
          dep.var.labels = "Status quo method",
          report = "vcsp*",
          model.numbers = FALSE,
          title = "Regression results: differences by school composition")


#####################################################
###### Supplement: qualitative patterns 
#####################################################

# This workflow repeats the data cleaning steps in 03_descriptive_graphs.R 

## Load hand-coded qualitative responses
combined_qual <- read.csv(here("data/fr_cleaned_10212025.csv"))

## Remove observations where the answer was nonsensical or "I don't know"
combined_qual_valid <- combined_qual %>% filter(coded. == "Yes") %>%
  mutate(condition = gsub("\\_algfairer|\\_otherfairer", "", limited_cell),
         choice = ifelse(grepl("algfairer", limited_cell), "Algorithm", 
                         "Status quo method")) %>%
  mutate(inadequate_response_tocode_bin = ifelse(inadequate_response_tocode == "Yes", 1, 0),
         valence_impersonalgood_combined = case_when(inadequate_response_tocode_bin == 1 | 
                                                       salience_impersonal_personal == "Not salient" ~ "N/A",
                                                     valence_impersonal_v_personal == "Other (salient but unclear)" ~ "Other (salient but unclear)",
                                                     valence_impersonal_v_personal %in%
                                                       c("Impersonal is good",
                                                         "Personal is bad",
                                                         "Personal is bad + impersonal is good") ~ "Impersonal is good",
                                                     TRUE ~ "Impersonal is bad"),
         valence_targetgood_combined = case_when(inadequate_response_tocode_bin == 1 | 
                                                   salience_targeting == "Not salient" ~ "N/A",
                                                 valence_targeted_v_not == "Other (salient but unclear)" ~ "Other (salient but unclear)", 
                                                 valence_targeted_v_not == "More targeting is good" ~ "More targeting is good", 
                                                 TRUE ~ "Less targeting is good")) %>%
  # Also create binary version 
  mutate(valence_impersonalgood_bin = ifelse(valence_impersonalgood_combined == "Impersonal is good",
                                             TRUE, FALSE),
         valence_targetgood_bin = ifelse(valence_targetgood_combined == "More targeting is good",
                                         TRUE, FALSE))

# Load LLM coded observations and combine with hand coded observations 
# Restricted to ones with valid responses  
llm_coded_in <- readRDS(here("data/llm_results_unlab.RDS")) %>%
  # Align variables with handcoded ones 
  mutate(salience_targeting = ifelse(salience_target_prediction == 1, 
                                     "Salient", "Not salient"),
         salience_impersonal_personal = ifelse(salience_impersonal_prediction == 1, 
                                               "Salient", "Not salient"),
         valence_impersonalgood_bin = ifelse(valence_impersonal_prediction == "Impersonal is good",
                                             TRUE, FALSE),
         valence_targetgood_bin = ifelse(valence_target_prediction == "More targeting is good",
                                         TRUE, FALSE)) %>%
  # Join on choice and condition 
  left_join(df_analytic %>%
              select(CaseId, derived_alg_morefair,
                     derived_statusquo_cond), by = "CaseId") %>%
  rename(condition = derived_statusquo_cond,
         valence_impersonalgood_combined = valence_impersonal_prediction,
         valence_targetgood_combined = valence_target_prediction) %>%
  mutate(choice = ifelse(derived_alg_morefair, "Algorithm", "Status quo method"))

# Bind together hand coded and LLM-coded observations
llmhand_qual_valid <- bind_rows(llm_coded_in, combined_qual_valid) %>%
  select(CaseId, condition, choice,
         salience_targeting, salience_impersonal_personal,
         valence_targetgood_bin, valence_impersonalgood_bin,
         valence_targetgood_combined, valence_impersonalgood_combined) %>%
  # Merge on demographic attributes 
  left_join(df_analytic, by = "CaseId")

# Run models 
polideo_valence <- glm(valence_impersonalgood_bin ~ relevel(factor(derived_polideo_category),
                                                               ref = "Slightly - extremely liberal"),
                       data = llmhand_qual_valid %>% filter(condition %in% c("Counselor discretion", "Parent requests")))

educ_valence <- glm(valence_impersonalgood_bin ~ relevel(factor(derived_educ_3cat),
                                                            ref = "College or\nprofessional school"),
                    data = llmhand_qual_valid %>% filter(condition %in% c("Counselor discretion", "Parent requests")))

re_valence <- glm(valence_impersonalgood_bin ~ relevel(factor(derived_race), 
                                                          ref = "White NH"),
                  data = llmhand_qual_valid %>% filter(condition %in% c("Counselor discretion", "Parent requests")))

race_labels <- c("2+ or other (ref: non-Hispanic white)",
                 "Asian (ref: non-Hispanic white)",
                 "Black non-Hispanic (ref: non-Hispanic white)",
                 "Hispanic (ref: non-Hispanic white)")
educ_labels <-  c("High school or less (ref: college+)", 
                  "Some college (ref: college+)")
pol_labels <- c("Moderate (ref: liberal)", "Conservative (ref: liberal)")

stargazer(re_valence, educ_valence, polideo_valence, 
          type = "latex",
          title = "Attaches positive valence to impersonal forms of knowledge: qualitative coding subsample; restricted to counselor discretion and parent request conditions (positive coefficient = rates impersonal knowledge as more positive)",
          label= "tab:valence_personalvimpersonal",
          covariate.labels = c(race_labels, educ_labels, pol_labels),
          out = here("output/tables/parent_valenceratings.tex"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          report = "vcsp*",
          font.size = "scriptsize")


